import json
from m_y.m_y_scrapy.items import DB_Film
import scrapy
from scrapy import FormRequest
from scrapy.loader import ItemLoader



class D_BSpider(scrapy.Spider):
    name = 'douban.scrapy'

    def start_requests(self):
        urls = ['https://movie.douban.com/j/search_subjects?type=movie&tag=%E7%83%AD%E9%97%A8&sort=recommend&page_limit=100&page_start=0']
        # 热门电影前一百 type、tag、sort、page_limit、page_start都可以更改

        for url in urls:

            return [FormRequest(url, callback=self.parse)]

    def parse(self, response):
        film_list = json.loads(response.body).get('subjects')
        for film in film_list:
            film_url = film.get('url')
            yield response.follow(film_url, self.parse_film,cb_kwargs={'film':film})



    def parse_film(self, response,film):

        item_loader = ItemLoader(item=DB_Film(), response=response)
        item_loader.add_value('film_name',film.get('title'))
        item_loader.add_value('daoyan',response.xpath('//*[@id="info"]/span[1]/span[2]/a/text()').get())
        item_loader.add_value('film_intrduce',response.xpath('//*[@id="link-report"]/span/text()').getall())
        item_loader.add_value('zhuyan', response.xpath('//*[@id="info"]/span[3]/span[2]/descendant::a/text()').getall())
        item_loader.add_value('url', film.get('url'))
        item_loader.add_value('rate', film.get('rate'))
        item_loader.add_value('cover', film.get('cover'))
        item_loader.add_value('id', film.get('id'))
        item = item_loader.load_item()
        yield item



